#required packages packages
packages <- c('ggplot2', 'plyr', 'jsonlite', 'lme4', 'lsmeans', 'grid', 'gridExtra', 'png')
#load them
lapply(packages, library, character.only = TRUE)
## Loading required package: Matrix
## Loading required package: estimability
## [[1]]
## [1] "ggplot2" "stats" "graphics" "grDevices" "utils" "datasets"
## [7] "methods" "base"
##
## [[2]]
## [1] "plyr" "ggplot2" "stats" "graphics" "grDevices" "utils"
## [7] "datasets" "methods" "base"
##
## [[3]]
## [1] "jsonlite" "plyr" "ggplot2" "stats" "graphics"
## [6] "grDevices" "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "lme4" "Matrix" "jsonlite" "plyr" "ggplot2"
## [6] "stats" "graphics" "grDevices" "utils" "datasets"
## [11] "methods" "base"
##
## [[5]]
## [1] "lsmeans" "estimability" "lme4" "Matrix"
## [5] "jsonlite" "plyr" "ggplot2" "stats"
## [9] "graphics" "grDevices" "utils" "datasets"
## [13] "methods" "base"
##
## [[6]]
## [1] "grid" "lsmeans" "estimability" "lme4"
## [5] "Matrix" "jsonlite" "plyr" "ggplot2"
## [9] "stats" "graphics" "grDevices" "utils"
## [13] "datasets" "methods" "base"
##
## [[7]]
## [1] "gridExtra" "grid" "lsmeans" "estimability"
## [5] "lme4" "Matrix" "jsonlite" "plyr"
## [9] "ggplot2" "stats" "graphics" "grDevices"
## [13] "utils" "datasets" "methods" "base"
##
## [[8]]
## [1] "png" "gridExtra" "grid" "lsmeans"
## [5] "estimability" "lme4" "Matrix" "jsonlite"
## [9] "plyr" "ggplot2" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods"
## [17] "base"
#path of where the jsons files are
foldername<-"/home/hanshalbe/bamcp/"
#difference function, we'll need this to get the pay-off per move as they've been saved
mydiff<-function(x){
y<-x
for (i in 2:length(x)){
#difference to previous entry
y[i]<-y[i]-x[i-1]
}
return(y)
}
#standard error, we'll need that for the error bars
se<-function(x){sd(x)/sqrt(length(x))}
The easiest thing to analyze are the scores over blocks, so let’s start with that…
#168 particitipants in total
idnum<-168
#getting the right format for the way the json's are named
format<-formatC(format="d", 1:idnum, flag="0", width=ceiling(log10(max(idnum))))
#initialize empty data frame
dat<-data.frame(id=numeric(), condition=numeric(), block=numeric(), payoff=numeric())
#loop through the json files
for (i in 1:idnum){
#get individual json
myjson<-fromJSON(paste0(foldername, "ppt", format[i], ".json"))
#payoff per block
payoff<-mydiff(as.numeric(unlist(myjson$allPayoffCounts)))[1:5]
#condition
condition<-rep(myjson$condition, 5)
#block number
block<-1:5
#participant's id
id<-rep(i, 5)
#dummy frame
dummy<-data.frame(id, condition, block, payoff)
#attach to main frame
dat<-rbind(dat, dummy)
}
#recode conditions
dat$cond<-mapvalues(dat$condition, 1:4, c("1-2", "1-2", "0.5-0.5", "0.5-0.5"))
#create a frame for plotting with means and standard errors per block and condition
dplot<-ddply(dat, ~cond+block, summarize, mu=mean(payoff), se=se(payoff))
#change condition to factor for plotting and type consitency
dplot$cond<-as.factor(dplot$cond)
#set limits
limits <- aes(ymax = mu + se, ymin=mu - se)
#dodge bars a little
pd <- position_dodge(.2)
#plot the means over blocks and condition
p<-ggplot(dplot, aes(x=block, y=mu, col=cond)) +
#error bars
geom_errorbar(aes(ymin=mu-se, ymax=mu+se), width=0.2, size=1, position=pd) +
#lines
geom_line(position=pd, size=1) +
#classic theme, legend on bottom
theme_classic()+theme(text = element_text(size=22, family="serif"), legend.position="bottom")+
ylab("Mean outcome")+xlab("Block")+
guides(col=guide_legend(title="Condition (beta distribution)"))+
#adjust text size
theme(panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=2))
#show plot
print(p)
We can already see that the average score seems to be higher for the 0.5-0.5 group than for the 1-2 group. Additionally, scores go up over blocks in general. Thus, participants seem to get better over time in this task. This is nice as they seem to learn as time passes.
Let’s test if this is significant though.
#mixed effects with blocks nested within participants
m<-lmer(payoff~block+cond+(block|id), data=dat)
#summary, this is where we look at the fixed effects
summary(m)
## Linear mixed model fit by REML ['lmerMod']
## Formula: payoff ~ block + cond + (block | id)
## Data: dat
##
## REML criterion at convergence: -1472.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6345 -0.6558 -0.0587 0.6000 4.5608
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.0016249 0.04031
## block 0.0001611 0.01269 -0.74
## Residual 0.0088469 0.09406
## Number of obs: 840, groups: id, 168
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.276125 0.009106 30.325
## block 0.008570 0.002495 3.435
## cond1-2 -0.027153 0.007827 -3.469
##
## Correlation of Fixed Effects:
## (Intr) block
## block -0.794
## cond1-2 -0.430 0.000
This cements the intuition derived from the previous plot statistically. Notice however that the scores for the two conditions are expected to be different a priori, i.e. they don’t have the same mean even if movements were performed at random.
Let’s check out if participants learn so differently in the different conditions that this even influences their scores in the test maps.
#initialize empty data frame
dat<-data.frame(id=numeric(), condition=numeric(), payoff=numeric())
#loop through participants
for (i in 1:idnum){
#get in json
myjson<-fromJSON(paste0(foldername ,"ppt", format[i], ".json"))
#get the scores by calculating the differences
payoff<-mydiff(as.numeric(unlist(myjson$allPayoffCounts)))[6:13]
#conditions
condition<-rep(myjson$condition, 8)
#running id number
id<-rep(i, 8)
#data frame
dummy<-data.frame(id, condition, payoff)
#bind them together
dat<-rbind(dat, dummy)
}
#recode condition names
dat$cond<-mapvalues(dat$condition, 1:4, c("1-2", "1-2", "0.5-0.5", "0.5-0.5"))
#a data frame for plotting
dplot<-ddply(dat, ~cond,summarize, mean=mean(payoff), se=se(payoff))
#limits
limits <- aes(ymax = mean + se, ymin=mean - se)
#do the plot
p <- ggplot(dplot, aes(y=mean, x=cond, fill=cond)) +
#bars
geom_bar(position="dodge", stat="identity")+
#golden ratio error bars
geom_errorbar(limits, position="dodge", width=0.31)+
#point size
geom_point(size=3)+
#labs
xlab("Condition")+
#classic theme
theme_classic() +
#adjust text size
theme(text = element_text(size=22, family="serif"))+
#adjust text size
theme(panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=2))
print(p)
Even though there is a difference in performance during the open maps, there is no difference between the two groups in the test maps. I think this is good news as both groups seem to understand what to do in the testmaps (more on this later).
One of the main predictions from the BAMCP-paper is the dwelling time of the algorithm between the different conditions. Dwelling is defined as staying in the same row or column for a longer time. Thus, we should check if this difference occurs within the open maps between conditions.
#a function to keep track of dwelling
mydwelling<-function(x){
#initialize at 0
count<-1
for (i in 2:length(x)){
#count increments if movement has changed
if (x[i]!=x[i-1]){count<-count+1}
}
return(count)
}
#initialize frame
dat<-data.frame(id=numeric(), condition=numeric(), dwell=numeric())
#loop through participants
for (i in 1:idnum){
#get json
myjson<-fromJSON(paste0(foldername, "ppt", format[i], ".json"))
#dwell vector
dwell<-rep(0, 5)
#loop through open maps
for (k in 1:5){
#dwelling times
dwell[k]<-mydwelling(unlist(myjson$allMovementTrackers[k]))
}
#condition
condition<-rep(myjson$condition, 5)
#id number
id<-rep(i, 5)
#dummy frame
dummy<-data.frame(id, condition, dwell)
#attach
dat<-rbind(dat, dummy)
}
#recode condition
dat$cond<-mapvalues(dat$condition, 1:4, c("1-2", "1-2", "0.5-0.5", "0.5-0.5"))
#data frame for plotting
dplot<-ddply(dat, ~cond,summarize, mean=mean(dwell), se=se(dwell))
#do the plot
p <- ggplot(dplot, aes(y=mean, x=cond, fill=cond)) +
#bars
geom_bar(position="dodge", stat="identity")+
#golden ratio error bars
geom_errorbar(limits, position="dodge", width=0.31)+
#point size
geom_point(size=3)+
#title
theme_classic() +
#labs
xlab("Condition")+ylab("Dwelling time")+
#adjust text size
theme(text = element_text(size=22, family="serif"))+
#adjust theme
theme(panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=2))
p
So this seems to go into the right direction, but also looks like a relatively small differencelet’s. Let’s see if this is also significant.
#just a normal regression, I know it's not great, but gives us an upper band of what's possible (kind of)
m<-lm(dwell~cond, data=dat)
#let's check
summary(m)
##
## Call:
## lm(formula = dwell ~ cond, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.007 -15.007 -0.102 12.898 58.993
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.1024 0.9062 36.530 <2e-16 ***
## cond1-2 1.9048 1.2815 1.486 0.138
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.57 on 838 degrees of freedom
## Multiple R-squared: 0.002629, Adjusted R-squared: 0.001439
## F-statistic: 2.209 on 1 and 838 DF, p-value: 0.1376
Seems like it’s going in the right direction but not significant. The effect of dwelling time is definitely not as big as predicted by BAMCP. I don’t think that’s bad as I don’t really think participants behave as efficiently as a BAMCP here. Also might make it ok for us to drop coding up the full BAMCP-model and instead focus on out-of-the-box tree search algorithms.
Somehow the potato-vector (i.e., whether or not a potato was found on a given move) did not always get saved (it is recoverable though, we’re working on it). However, we can still easily look at other things such as the probability of potatoes over time. For this, I will first create and explain some higher level helper functions.
#This gets the location over time within the maps given the movements
getlocation<-function(movement){
#total map
map<-matrix(0,nrow=210, ncol=210)
#column at start
col<-106
#row at start
row<-106
#loop through movement vector
for (i in seq_along(movement)){
#the current location on the map gets incremented
map[row,col]<-map[row,col]+1
#if movement is up, row number is incremented
if (movement[i]=="up"){row<-row+1}
#if movement is down, rown number is decremented
if (movement[i]=="down"){row<-row-1}
#if movement is right, column number is incremented
if (movement[i]=="right"){col<-col+1}
#if movement is left, column number is decremented
if (movement[i]=="left"){col<-col-1}
}
return(map)
}
#this returns the vector of visited probabilities, i.e. the underlying p(potato)
getprobs<-function(movement, m){
#total map
map<-matrix(0,nrow=210, ncol=210)
#column number
col<-106
#row number
row<-106
#initialize probability vector
probs<-rep(0, length(movement))
for (i in seq_along(movement)){
map[row,col]<-map[row,col]+1
#probability vector
probs[i]<-m[row, col]
if (movement[i]=="up"){row<-row+1}
if (movement[i]=="down"){row<-row-1}
if (movement[i]=="right"){col<-col+1}
if (movement[i]=="left"){col<-col-1}
}
return(probs)
}
These two function generate the trajectory on the map on which a participant walked along and the probability vector, i.e. the underlying probs, over which the participant moved. So let’s take it from here and do some further plotting.
#initialize data frame
dat<-data.frame(prob=numeric(), trial=numeric(), condition=numeric(), block=numeric(), id=numeric())
#loop through the json files
for (i in 1:idnum){
#get individual json
myjson<-fromJSON(paste0(foldername, "ppt", format[i], ".json"))
#initialize the vectors
probs<-numeric()
numb<-numeric()
for (j in 1:5){
#probabilities
p<-getprobs(myjson$allMovementTrackers[[j]], outer(myjson$allColParameters[[j]], myjson$allRowParameters[[j]]))
#number of trials
numb<-c(numb, 1:100)
}
#dummy frame
dummy<-data.frame(prob=p, trial=numb, condition=rep(myjson$condition, 5), block=rep(1:5, each=100), id=rep(i, 500))
#bind them together
dat<-rbind(dat, dummy)
}
#recode conditions
dat$cond<-mapvalues(dat$condition, 1:4, c("1-2", "1-2", "0.5-0.5", "0.5-0.5"))
#create a frame for plotting with means and standard errors per block and condition
dplot<-ddply(dat, ~cond+trial, summarize, mu=mean(prob), se=se(prob))
#change condition to factor for plotting and type consitency
dplot$cond<-as.factor(dplot$cond)
#set limits
limits <- aes(ymax = mu + se, ymin=mu - se)
#plot the means over blocks and condition
p<-ggplot(dplot, aes(x=trial, y=mu, col=cond)) +
#error bars
geom_errorbar(aes(ymin=mu-se, ymax=mu+se), width=0.2, size=1, position=pd) +
#lines
geom_line(position=pd, size=1) +
#classic theme, legend on bottom
theme_classic()+
theme(text = element_text(size=22, family="serif"), legend.position="bottom")+
ylab("Mean probability")+xlab("Trial")+
guides(col=guide_legend(title="Condition (beta distribution)"))+
#adjust text size
theme(panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
panel.border = element_rect(colour = "black", fill=NA, size=2))
#show plot
print(p)
The plot above shows the mean probabilities over time. It looks fairly noisy, but let’s do a really rough test. Note: you can also further bin the trials, i.e. means for 10 trials of time. This looks smoother but still no real linear trend.
#logistic regression with probability vector
#notice that this will produce a warning but still an efficient estimate, we only need the transform
m<-glm(prob~trial, data=dat)
#summarize
summary(m)
##
## Call:
## glm(formula = prob ~ trial, data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.25286 -0.19698 -0.07872 0.13287 0.76394
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.292e-01 1.667e-03 137.482 <2e-16 ***
## trial 2.369e-04 2.866e-05 8.268 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.05747753)
##
## Null deviance: 4831.9 on 83999 degrees of freedom
## Residual deviance: 4828.0 on 83998 degrees of freedom
## AIC: -1548.7
##
## Number of Fisher Scoring iterations: 2
So the odds of potatoes do go up (yet only slightly) over time. Participants seem to get a little better as trials go by. I guess it’s also ok if this is not too strong in any case, it’s a complicated task and sometimes randomness strikes.
We can expand on the analysis above and try and assess how many steps ahead participants might actually plan. I proposed to do this by assessing the k-steps ahead auto-correlation of the probability vector and a (crude) measure of uncertainty. For this, let me first create some helper functions.
#this returns a vector of a crude estimate of how often a particular
#row-column combination has been explored before
getexplore<-function(movement, m){
#create map
map<-matrix(0,nrow=210, ncol=210)
#col-number
col<-106
#row-number
row<-106
#initialize vectpr
exploration<-rep(0, length(movement))
#loop through
for (i in seq_along(movement)){
#keep track
map[row,col]<-map[row,col]+1
g<-getlocation(movement[1:i])
#row exploration
rs<-rowSums(g)
#column exploration
cs<-colSums(g)
#crude combination of the two
exploration[i]<-0.5*rs[row]/sum(rs)+0.5*cs[col]/sum(cs)
#Updates as usual
if (movement[i]=="up"){row<-row+1}
if (movement[i]=="down"){row<-row-1}
if (movement[i]=="right"){col<-col+1}
if (movement[i]=="left"){col<-col-1}
}
return(exploration)
}
#this function returns the probability vector of a random mover and will serve as a baseline
randprobs<-function(movement, m){
map<-matrix(0,nrow=210, ncol=210)
col<-106
row<-106
probs<-rep(0, length(movement))
for (i in seq_along(movement)){
map[row,col]<-map[row,col]+1
probs[i]<-m[row, col]
#random move
move<-sample(c("up", "down", "left", "right"), 1)
if (move=="up"){row<-row+1}
if (move=="down"){row<-row-1}
if (move=="right"){col<-col+1}
if (move=="left"){col<-col-1}
}
return(probs)
}
#this function returns the exploration of a random mover, again as a baseline
randexplore<-function(movement, m){
map<-matrix(0,nrow=210, ncol=210)
col<-106
row<-106
exploration<-rep(0, length(movement))
for (i in seq_along(movement)){
map[row,col]<-map[row,col]+1
rs<-rowSums(map)
cs<-colSums(map)
exploration[i]<-0.5*rs[row]/sum(rs)+0.5*cs[col]/sum(cs)
#random move
move<-sample(c("up", "down", "left", "right"), 1)
if (move=="up"){row<-row+1}
if (move=="down"){row<-row-1}
if (move=="right"){col<-col+1}
if (move=="left"){col<-col-1}
}
return(exploration)
}
I can use these functions now to compare the lag of siginificant auto-correlations within the vectors created by participants and a random mover. If there is a significant auto-correlation that is bigger than chance and goes k-steps back, then this means that, for example, rewards that are k-steps away influence present moves.
#initialize vectors
correlation<-randcor<-exploration<-randexp<-numb<-numeric()
#loop through
for (i in 1:idnum){
myjson<-fromJSON(paste0(foldername, "ppt", format[i], ".json"))
for (j in 1:5){
#probability vector
p<-getprobs(myjson$allMovementTrackers[[j]], outer(myjson$allColParameters[[j]], myjson$allRowParameters[[j]]))
#random probability vector
prand<-randprobs(myjson$allMovementTrackers[[j]], outer(myjson$allColParameters[[j]], myjson$allRowParameters[[j]]))
#exploration vector
e<-getexplore(myjson$allMovementTrackers[[j]], outer(myjson$allColParameters[[j]], myjson$allRowParameters[[j]]))
#random exploration vector
erand<-randexplore(myjson$allMovementTrackers[[j]], outer(myjson$allColParameters[[j]], myjson$allRowParameters[[j]]))
#correlation functions
correlation<-c(correlation, acf(rev(p), plot=FALSE)$acf[2:11])
exploration<-c(exploration, acf(rev(e), plot=FALSE)$acf[2:11])
randcor<-c(randcor, acf(prand, plot=FALSE)$acf[2:11])
randexp<-c(randexp, acf(erand, plot=FALSE)$acf[2:11])
#lag numbers
numb<-c(numb, 1:10)
}
}
#data frame
d<-data.frame(cor=correlation, rcor=randcor, exp=exploration, rexp=randexp, num=numb)
#plots of correlations
dplot<-ddply(d, ~num, summarize, cor=mean(cor), rcor=mean(randcor), rex=mean(exp), randex=mean(randexp))
#restack slightly
dplot<-data.frame(horizon=rep(1:10, 4),
correlation=c(dplot$cor, dplot$rcor, dplot$rex, dplot$randex),
Method=rep(rep(c("Empirical", "Random"), each=10), 2),
Value=rep(c("Expectation", "Certainty"), each=20))
#horizon smaller than 6 seems enough
dplot<-subset(dplot, horizon<6)
#do the plot
p <- ggplot(dplot, aes(x=horizon, y=correlation, col=Method)) +
#bars
geom_line(stat="identity")+
#0 to 1
ylim(c(0,0.8)) +
#golden ratio error bars
geom_point(size=1)+
#facet the plot
facet_wrap(~Value, scales = "free_y")+
#theme and titles
theme_classic() +xlab("Horizon")+ylab("ACF")+
ggtitle("Planning horizon")+
theme(text = element_text(size=20, family="serif"),
strip.background = element_blank(),
plot.title = element_text(size=20),
legend.position = "bottom")
print(p)
So this seems to suggest that a horizon of about k=3-4 affects participants’ moves. However, we will have to assess this by using the tree search algorithms later on. Another possibility could be to do the same thing with the empirical probs (will need the potato vector etc.), the empirical uncertainty and a UCB vector. This could then also be done with arima-x models actually.
Let’s focus on the test maps. We want to know if the obstacles (restricted vs. unrestricted) and the information (high vs. low) influenced participants behavior.
Our hypotheses were: A: more information leads to better planning and thus to better scores and B: More physical restrictions in the map lead to better planning and thus to better scores. Let’s first recode the data as it’s saved in a slightly complicated manner.
#initialize frame
dat<-data.frame(id=numeric(), map=numeric(), info=numeric(), outcome=numeric(), trap=numeric())
#loop through
for (i in 1:idnum){
myjson<-fromJSON(paste0(foldername, "ppt", format[i], ".json"))
maps<-numeric()
infos<-numeric()
trap<-numeric()
for (j in 1:8){
#outer product to generate matrix
m<-outer(myjson$allRowParameters[[5+j]] ,myjson$allColParameters[[5+j]])
#fake central movement matrix
mt<-t(matrix(c("no","up" ,"no", "left", "no", "right", "no", "down", "no"), nrow=3, ncol=3))
#where is the bait
mt<-mt[m[11:13,11:13]==0.8]
#where from the center is it?
bait<-mt[mt!="no"]
#high or low info, roughly assessed but seems to work
info<-ifelse(sum(myjson$allExploredRows[[5+j]])+sum(myjson$allExploredCols[[5+j]])>200, "high", "low")
#these are the conditionals as set up by moritz
if (bait=="right" & info=="high"){map<-"map1"}
if (bait=="left" & info=="low"){map<-"map2"}
if (bait=="left" & info=="high"){map<-"map3"}
if (bait=="right" & info=="low"){map<-"map4"}
if (bait=="down" & info=="high"){map<-"map5"}
if (bait=="up" & info=="low"){map<-"map6"}
if (bait=="up" & info=="high"){map<-"map7"}
if (bait=="down" & info=="low"){map<-"map8"}
#did they go for the trap
trap<-c(trap, myjson$allMovementTrackers[[5+j]][1]==bait)
#what map was it
maps<-c(maps, map)
#info
infos<-c(infos,info)
}
#id
id<-rep(i, 8)
#total potato counts
outcome<-as.numeric(unlist(myjson$allPotatoCounts[6:13]))
#dummy frame
dummy<-data.frame(id=id, map=maps, info=infos, outcome=outcome, trap=trap)
#bind them
dat<-rbind(dat, dummy)
}
#restricted as by setup
dat$res<-mapvalues(dat$map, paste0("map", 1:8), c("restricted", "restricted", "unrestricted","unrestricted","restricted","restricted","unrestricted","unrestricted" ))
Now we can use this data frame to test if restriction and information influenced the score in the test maps.
m1<-lm(outcome~info+res, data=dat)
summary(m1)
##
## Call:
## lm(formula = outcome ~ info + res, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7932 -1.2187 0.2068 1.2068 4.7812
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.79315 0.07612 49.831 < 2e-16 ***
## infolow -0.19643 0.08790 -2.235 0.0256 *
## resunrestricted -0.37798 0.08790 -4.300 1.83e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.611 on 1341 degrees of freedom
## Multiple R-squared: 0.01721, Adjusted R-squared: 0.01575
## F-statistic: 11.74 on 2 and 1341 DF, p-value: 8.791e-06
Cool! Low information and less restrictions lead to lower scores. Thus, these two hypotheses can be confirmed.
Let’s check if these variables also influence the probability to go for the bait.
m2<-glm(trap~info+res, data=dat, family="binomial")
summary(m2)
##
## Call:
## glm(formula = trap ~ info + res, family = "binomial", data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1914 -1.0394 -0.8844 1.3219 1.5020
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.33356 0.09616 -3.469 0.000523 ***
## infolow 0.36644 0.11180 3.278 0.001047 **
## resunrestricted -0.40328 0.11181 -3.607 0.000310 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1823.6 on 1343 degrees of freedom
## Residual deviance: 1799.9 on 1341 degrees of freedom
## AIC: 1805.9
##
## Number of Fisher Scoring iterations: 4
Less so. Seems like less restrictions make it less likely to go for the bait. I think this is quite intuitive as there are simply more options with less restrictions, i.e. more direction to go.
#remove what we've stored so far (this is just to avoid further clashes and safe memory)
rm(list=ls())
#this function again:
getlocation<-function(movement){
map<-matrix(0,nrow=23, ncol=23)
col<-12
row<-12
for (i in seq_along(movement)){
map[row,col]<-map[row,col]+1
if (movement[i]=="up"){row<-row+1}
if (movement[i]=="down"){row<-row-1}
if (movement[i]=="right"){col<-col+1}
if (movement[i]=="left"){col<-col-1}
}
return(map)
}
x<-1:168
format<-formatC(format="d",x,flag="0",width=ceiling(log10(max(x))))
#a list of all test maps
m<-rep(list(matrix(0, nrow=23, ncol=23)), 8)
#loop through participants
for (i in 1:168){
#json
myjson<-fromJSON(paste0("/home/hanshalbe/bamcp/ppt", format[i], ".json"))
#loop through test maps
for (j in 1:8){
#get target
ma<-outer(myjson$allRowParameters[[5+j]] ,myjson$allColParameters[[5+j]])
#check where the bait is
mt<-t(matrix(c("no","up" ,"no", "left", "no", "right", "no", "down", "no"), nrow=3, ncol=3))
mt<-mt[ma[11:13,11:13]==0.8]
bait<-mt[mt!="no"]
#check info
info<-ifelse(sum(myjson$allExploredRows[[5+j]])+sum(myjson$allExploredCols[[5+j]])>200, "high", "low")
#recode and add counts if map identified
if (bait=="right" & info=="high"){m[[1]]<-m[[1]]+getlocation(myjson$allMovementTrackers[[5+j]])}
if (bait=="left" & info=="low"){m[[2]]<-m[[2]]+getlocation(myjson$allMovementTrackers[[5+j]])}
if (bait=="left" & info=="high"){m[[3]]<-m[[3]]+getlocation(myjson$allMovementTrackers[[5+j]])}
if (bait=="right" & info=="low"){m[[4]]<-m[[4]]+getlocation(myjson$allMovementTrackers[[5+j]])}
if (bait=="down" & info=="high"){m[[5]]<-m[[5]]+getlocation(myjson$allMovementTrackers[[5+j]])}
if (bait=="up" & info=="low"){m[[6]]<-m[[6]]+getlocation(myjson$allMovementTrackers[[5+j]])}
if (bait=="up" & info=="high"){m[[7]]<-m[[7]]+getlocation(myjson$allMovementTrackers[[5+j]])}
if (bait=="down" & info=="low"){m[[8]]<-m[[8]]+getlocation(myjson$allMovementTrackers[[5+j]])}
}
}
#let's create a plotting function to avoid having to do the same comment 8 times
#takes in the number of the testmap k and a title
myplot<-function(k, title){
#get png image, this is linked to where I've stored them
img <- readPNG(paste0("/home/hanshalbe/Documents/newimage", k, ".png"))
#get a raster with interpolations for this image
g <- rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc"))
#get the correct counts as matrix
map<-m[[k]][9:15, 9:15]
#make a data frame of it for ggplot
dp<-data.frame(x=rep(-3:3 ,each=7), y=rep(-3:3, 7), count=as.vector(map))
#the max is always the starting point, we won't color it as this would be misleading
dp$count<-ifelse(dp$count==max(dp$count), NA, dp$count)
#everything with 0 counts won't be plotted
dp$count<-ifelse(dp$count==0, NA, dp$count)
#start to plot with fills determined by counts
p <- ggplot(dp, aes(x = x, y = y, fill = count))+
#go all over the plot
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf)+
#lower alpha of heatmap such that png is still recognizable
geom_tile(alpha=0.4)+ theme_bw()+
#palett
scale_fill_distiller(palette = "Spectral")+
#expand over whole plot
scale_x_continuous(expand = c(0, 0))+
#same for y
scale_y_discrete(expand = c(0, 0))+ guides(fill=FALSE)+
#adjust them
theme(strip.background = element_blank(),
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())+ggtitle(title)
#return the plot
return(p)
}
The function above enables us to produce the heatmaps and overlay them over the testmaps. This will provide the same info as the tests before but in a nicer visual way. Thus, I will close with those plots here.
myplot(1, "Restricted space, high information")
myplot(2, "Restricted space, low information")
myplot(3, "Unrestricted space, high information")
myplot(4, "Unrestricted space, low information")
myplot(5, "Restricted space, high information")
myplot(6, "Restricted space, low information")
myplot(7, "Unrestricted space, high information")
myplot(8, "Unrestricted space, low information")